#=
dqmc的核心内容
=#

"""
计算
Gtt(i,j) = ⟨ c_i(t) c_j(t) ⟩
"""
function eq_green_scratch(ss::ScrollSVD{T}) where T
    siz = size(ss.B[end])
    ptr = findfirst(ss.L)
    if isnothing(ptr)
        VL = Diagonal(ones(siz[1]))
        DL = VL
        UL = VL
        UR, DR, VR = ss.F[end].U, Diagonal(ss.F[end].S), ss.F[end].Vt
    elseif ptr == 1
        VL, DL, UL = ss.F[1].U, Diagonal(ss.F[1].S), ss.F[1].Vt
        UR = Diagonal(ones(siz[1]))
        DR = UR
        VR = UR
        ##上面这个数值稳定性会突然出问题
        ##用下面这个
        ##VL DL UL UR=I DR=I VR=I -> I I I UR=VL DR=DL VR=UL
        #VL = Diagonal(ones(siz[1]))
        #DL = VL
        #UL = VL
        #UR, DR, VR = ss.F[1].U, Diagonal(ss.F[1].S), ss.F[1].Vt
    else
        VL, DL, UL = ss.F[ptr].U, Diagonal(ss.F[ptr].S), ss.F[ptr].Vt
        UR, DR, VR = ss.F[ptr-1].U, Diagonal(ss.F[ptr-1].S), ss.F[ptr-1].Vt
    end
    #gtt = inv(Diagonal(ones(siz[1]))+UR*DR*VR*VL*DL*UL)
    #M = inv(UL*UR) + DR*(VR*VL)*DL
    #Fm = svd(M)
    #gtt = inv(Fm.Vt*UL)*inv(Diagonal(Fm.S))*inv(UR*Fm.U)
    #
    DLS = Diagonal(ones(Float64, siz[1]))
    DLB = Diagonal(ones(Float64, siz[1]))
    DRS = Diagonal(ones(Float64, siz[1]))
    DRB = Diagonal(ones(Float64, siz[1]))
    for i in Base.OneTo(siz[1])
        if DL[i, i] > 1.0
            DLB[i, i] = DL[i, i]
            DLS[i, i] = 1.0
        else
            DLS[i, i] = DL[i, i]
            DLB[i, i] = 1.0
        end
        if DR[i, i] > 1.0
            DRB[i, i] = DR[i, i]
            DRS[i, i] = 1.0
        else
            DRS[i, i] = DR[i, i]
            DRB[i, i] = 1.0
        end
    end
    #
    M = inv(DRB)*adjoint(UL*UR)*inv(DLB) + DRS*(VR*VL)*DLS
    Fm = svd(M, alg=LinearAlgebra.QRIteration())
    ML = adjoint(UL)*inv(DLB)*adjoint(Fm.Vt)
    MR = adjoint(Fm.U)*inv(DRB)*adjoint(UR)
    gtt = ML*inv(Diagonal(Fm.S))*MR
    #增加计算phase
    phase = det(Fm.Vt*UL)*det(UR*Fm.U)
    phase = phase / abs(phase)
    return gtt, phase
end


"""
当ss中所有都在R里面的时候，进行计算，算完以后就全都到L中
从G(beta,beta)开始
"""
function ueq_green_scratch(ss::ScrollSVD{T}) where T
    siz = size(ss.B[end])
    ptr = findfirst(ss.L)
    if isnothing(ptr)
        VL = Diagonal(ones(siz[1]))
        DL = VL
        UL = VL
        UR, DR, VR = ss.F[end].U, Diagonal(ss.F[end].S), ss.F[end].Vt
    elseif ptr == 1
        VL, DL, UL = ss.F[ptr].U, Diagonal(ss.F[ptr].S), ss.F[ptr].Vt
        UR = Diagonal(ones(siz[1]))
        DR = UR
        VR = UR
        #VL = Diagonal(ones(siz[1]))
        #DL = VL
        #UL = VL
        #UR, DR, VR = ss.F[1].U, Diagonal(ss.F[1].S), ss.F[1].Vt
    else
        VL, DL, UL = ss.F[ptr].U, Diagonal(ss.F[ptr].S), ss.F[ptr].Vt
        UR, DR, VR = ss.F[ptr-1].U, Diagonal(ss.F[ptr-1].S), ss.F[ptr-1].Vt
    end
    #
    DLS = Diagonal(ones(Float64, siz[1]))
    DLB = Diagonal(ones(Float64, siz[1]))
    DRS = Diagonal(ones(Float64, siz[1]))
    DRB = Diagonal(ones(Float64, siz[1]))
    for i in Base.OneTo(siz[1])
        if DL[i, i] > 1.0
            DLB[i, i] = DL[i, i]
            DLS[i, i] = 1.0
        else
            DLS[i, i] = DL[i, i]
            DLB[i, i] = 1.0
        end
        if DR[i, i] > 1.0
            DRB[i, i] = DR[i, i]
            DRS[i, i] = 1.0
        else
            DRS[i, i] = DR[i, i]
            DRB[i, i] = 1.0
        end
    end
    #G(0,t) = -<c+(t) c(0)>
    #G(t,0) = <c(t) c+(0)>
    #M = adjoint(UL*UR) + DR*(VR*VL)*DL
    M = inv(DRB)*adjoint(UL*UR)*inv(DLB) + DRS*(VR*VL)*DLS
    Fm = svd(M, alg=LinearAlgebra.QRIteration())
    #gt0 = UL^-1 [U D Vt]^-1 DR VR
    #    = (VtUL)^-1 D^-1 U^-1 DR VR
    #g0t = - VLDL [U D Vt]^-1 UR^-1
    #    = - VLDL Vt^-1 D^-1 (URU)^-1
    Di = inv(Diagonal(Fm.S))
    ML = adjoint(UL)*inv(DLB)*adjoint(Fm.Vt)
    MR = adjoint(Fm.U)*inv(DRB)*DR*VR
    gt0 = ML*Di*MR
    ML = VL*DL*inv(DLB)*adjoint(Fm.Vt)
    MR = adjoint(Fm.U)*inv(DRB)*adjoint(UR)
    g0t = ML*Di*MR
    return -g0t, gt0
end

"""
计算Gtt = G(iNt, iNt), 需要的是Nt+1位置上的B
"""
function down_prog_green(gf::Matrix{T}, bmat::Matrix{T}) where T
    return inv(bmat)*gf*bmat
end

"""
计算Gtt，需要的是Nt位置上的B
"""
function up_prog_green(gf::Matrix{T}, bmat::Matrix{T}) where T
    return bmat*gf*inv(bmat)
end



"""
每次更新一个数值
"""
function ShermanMorrison(del::Diagonal{T}, gf::Matrix{T}) where T
    siz = size(del)
    for i in Base.OneTo(siz[1])
        if isapprox(del[i, i], 0)
            continue
        end
        Ipgf = Diagonal(ones(T, siz[1]))-gf
        tmp = del[i, i]*Ipgf[i, :]
        gf = gf - gf[:, i]*tmp'/(1+tmp[i])
    end
    return gf
end


"""
每次更新一个小矩阵，idx是矩阵对应的位置
"""
function PressShermanMorrison!(idx::Vector{Int}, del::Matrix{T}, gf::Matrix{T}) where T
    siz = size(gf)
    #U = zeros(T, siz[1], length(idx))
    V = zeros(T, length(idx), siz[1])
    #for x in Base.OneTo(length(idx))
    #    i = idx[x]
    #    U[i, :] = del[x, :]
    #end
    for x in Base.OneTo(length(idx))
        i = idx[x]
        @. V[x, :] = -gf[i, :]
        V[x, i] += 1.0
    end
    #这个矩阵很小，不需要太多优化
    indk = Diagonal(ones(T, length(idx)))
    #原本是
    #den = inv(indk + V*U)
    #gf = gf - gf*U*den*V
    #1.在小矩阵上加负号，加速减少copy
    #den = -inv(indk + V*U)
    #imden = U*den*V
    #for i in Base.OneTo(siz[1])
    #    imden[i, i] += 1.0
    #end
    #return gf*imden
    #std = gf*imden
    #2.减少矩阵大小
    #gfv = @view gf[:, idx]
    #vsl = @view V[:, idx]
    #den = inv(indk + vsl*del)
    #den = gfv*del*den*V
    ##return gf - den
    ##直接return会allocate
    #@. gf = gf - den
    #return gf
    #3.利用1 rank update
    gfv = @view gf[:, idx]
    vsl = @view V[:, idx]
    den = inv(indk + vsl*del)
    #先乘小矩阵
    leftm = gfv*(del*den)
    #rghtm = V', @view adjoint 和ger！在一起有问题
    V .= adjoint.(V)
    for i in Base.OneTo(length(idx))
        lv = @view leftm[:, i]
        rv = @view V[i, :]
        #gf = gf - 1.0 * lv * adjoint(rv)
        LinearAlgebra.BLAS.ger!(T(-1.0), lv, rv, gf)
    end
    #if !all(isapprox.(std, gf, atol=1e-9))
    #    println(maximum(abs.(std - gf)))
    #    @assert false
    #end
    return gf
end


"""
求dmat导致的行列式
"""
function Woodbury(del::Diagonal{T}, gf::Matrix{T}) where T
    siz = size(del)
    ind = Diagonal(ones(T, siz[1]))
    return det(ind + del*(ind - gf))
end

"""
如果dmat是个稀疏且非对角
"""
function Woodbury(idx::Vector{Int}, del::Matrix{T}, gf::Matrix{T}) where T
    siz = size(gf)
    #ind = Diagonal(ones(T, siz[1]))
    U = zeros(T, siz[1], length(idx))
    V = zeros(T, length(idx), siz[1])
    for x in Base.OneTo(length(idx))
        i = idx[x]
        U[i, :] = del[x, :]
    end
    for x in Base.OneTo(length(idx))
        i = idx[x]
        #减少右侧的allocate
        @. V[x, :] = -gf[i, :]
        V[x, i] += 1.0
    end
    #原本是
    #return det(ind + U*V)
    #实际上det(I(2*Nx) + U*V) = det(I(length(idx)) + V*U)
    #后者矩阵维度小，det快得多
    ind = Diagonal(ones(T, length(idx)))
    return det(ind + V*U)
    #进一步优化U的占用，优化的程度不是很大
    #V中只有idx列有用：vsl = @view V[:, idx]
    #vsl = zeros(T, length(idx), length(idx))
    #for x in Base.OneTo(length(idx))
    #    i1 = idx[x]
    #    for y in Base.OneTo(length(idx))
    #        i2 = idx[y]
    #        vsl[x, y] = -gf[i1, i2]
    #    end
    #    vsl[x, x] += 1.0
    #end
    #ind = Diagonal(ones(T, length(idx)))
    #return det(ind + vsl*del)
end


"""
记录经过了几次sweep
"""
function qmcstatus(Nt, allbidxs=nothing, isR2L=nothing)
    if isnothing(allbidxs)
        allbidxs = reverse(Vector(1:1:Nt))
        isR2L = true
        return allbidxs, isR2L
    end
    if isR2L
        allbidxs = vcat(allbidxs[2:end], allbidxs[1])
    else
        allbidxs = vcat(allbidxs[end], allbidxs[1:end-1])    
    end
    if allbidxs[end] == 1
        isR2L = !isR2L
    end
    return allbidxs, isR2L
end


"""
初始化ss
"""
function initialize_SS(Nt, Ng, sp, hscfg, nx, ny, nz)
    sslen = Int(Nt//Ng)
    bmats2 = Vector{Matrix{ComplexF64}}(undef, Ng)
    allbmats2 = Vector{Matrix{ComplexF64}}(undef, Nt)
    for bi in Base.OneTo(Ng)
        cfg = hscfg[bi, :]
        ehk, Ui = unpack_splitting(sp, bi)
        bmats2[bi] = bmat_IsingND(ehk, cfg, Ui, nx, ny, nz, sp.dt)
        allbmats2[bi] = bmats2[bi]
    end
    ss2 = ScrollSVD(bmats2)
    for si in Base.OneTo(sslen-1)
        for bi in Base.OneTo(Ng)
            cfg = hscfg[bi+si*Ng, :]
            ehk, Ui = unpack_splitting(sp, bi+si*Ng)
            bmats2[bi] = bmat_IsingND(ehk, cfg, Ui, nx, ny, nz, sp.dt)
            allbmats2[bi+si*Ng] = bmats2[bi]
        end
        push!(ss2, bmats2)
    end
    return sslen, bmats2, allbmats2, ss2
end


"""
DQMC计算平均的符号
"""
function dqmc_step(Nt, Ng, sp, hscfg, nx, ny, nz, sslen, bmats, allbmats, ss)
    Nx = length(nx)
    #
    gf, ph = eq_green_scratch(ss)
    allbidxs, isR2L = qmcstatus(Nt)
    #R->L
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Nt - (Ng*(si-1)+bi) + 1
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #println(tau, " ", xi, " ", sp1)
                idx, dmat = Δmat_IsingND(length(cfg), xi, -sp1, sp1,
                Ui[xi], nx[xi], ny[xi], nz[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = -sp1
                    hscfg[tau, xi] = -sp1
                end
            end
            #重新计算更新过的
            bmats[Ng-bi+1] = bmat_IsingND(ehk, cfg, Ui, nx, ny, nz, sp.dt)
            allbmats[tau] = bmats[Ng-bi+1]
            gf = down_prog_green(gf, allbmats[tau])
            #
            allbidxs, isR2L = qmcstatus(Nt, allbidxs, isR2L)
            #println(allbidxs, isR2L)
        end
        scrollR2L(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(gf - oldgf)
        #println(newph, " ", ph, " ", abs(newph - ph))
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
        @← gf newgf
        @← ph newph
    end
    #L->R
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Ng*(si-1)+bi
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            gf = up_prog_green(gf, allbmats[tau])
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                idx, dmat = Δmat_IsingND(length(cfg), xi, -sp1, sp1,
                Ui[xi], nx[xi], ny[xi], nz[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = -sp1
                    hscfg[tau, xi] = -sp1
                end
            end
            #重新计算B矩阵
            bmats[bi] = bmat_IsingND(ehk, cfg, Ui, nx, ny, nz, sp.dt)
            allbmats[tau] = bmats[bi]
            allbidxs, isR2L = qmcstatus(Nt, allbidxs, isR2L)
            #println(allbidxs, isR2L)
        end
        scrollL2R(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(newph, " ", ph, " ", abs(newph - ph))
        @← gf newgf
        @← ph newph
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
    end
    return hscfg, bmats, allbmats, ss, gf, ph
end


"""
非等时格林函数
"""
function ueq_green_func(Nt, Ng, Nx, sslen, bmats, allbmats, ss)
    if any(ss.L)
        throw(error("has L"))
    end
    Gf0t = zeros(ComplexF64, Nt, 2Nx, 2Nx)
    Gft0 = zeros(ComplexF64, Nt, 2Nx, 2Nx)
    #t = Nt
    for t in Base.OneTo(sslen)
        scrollR2L(ss, [ss.B[sslen-t+1]])
        tau = Nt - t*Ng + 1
        #g0t = g(tau, Nt)
        #gt0 = g(Nt, tau)
        g0t, gt0 = ueq_green_scratch(ss)
        Gf0t[tau, :, :] = g0t
        Gft0[tau, :, :] = gt0
    end
    #填充
    for t in Base.OneTo(sslen)
        #把L中的内容转回R
        scrollL2R(ss, [ss.B[t]])
        #
        g0t = Gf0t[(t-1)*Ng + 1, :, :]
        gt0 = Gft0[(t-1)*Ng + 1, :, :]
        for gi in Base.OneTo(Ng-1)
            tau = (t-1)*Ng + gi + 1
            gt0 = allbmats[tau-1] * gt0
            g0t = g0t * inv(allbmats[tau-1])
            Gf0t[tau, :, :] = g0t
            Gft0[tau, :, :] = gt0
        end
    end
    return Gf0t, Gft0
end


#=
comments
#测试gf和bmats
    #println(bidxs)
    #gf19 = allbmats[20]
    #for i in Base.OneTo(19)
    #    gf19 = allbmats[i]*gf19
    #end
    #gf2 = inv(Diagonal(ones(4))+gf19)
    #println(gf2)
    #println(down_prog_green(gf, allbmats[20]))
    #println(gf2 - down_prog_green(gf, bmats[end]))
    #b1 = Diagonal(ones(4))
    #for i in Base.OneTo(Nt)
    #    b1 = bmat_Ising(hscfg[i, :])*b1
    #end
    #println(b1)
    #println(ss.F[end].U*Diagonal(ss.F[end].S)*ss.F[end].Vt)
    #println(hscfg)
    #return
#测试gf的局部更新
    ohs = hscfg[19, 1]
    nhs = -ohs
    dmat = Δmat_Ising(1, nhs, ohs)
    ohs = hscfg[19, 2]
    nhs = -ohs
    dmat += Δmat_Ising(2, nhs, ohs)
    #
    cfg = hscfg[19, :]
    cfg[1] = -cfg[1]
    cfg[2] = -cfg[2]
    newbm = bmat_Ising(cfg)
    println(newbm)
    dmat = Diagonal(rand(4).-0.5)
    bm2 = (Diagonal(ones(4))+dmat)*allbmats[19]
    #println(bm2)
    #return
    allbmats[19] = bm2
    newgf = rawgf(allbmats, 19)
    ##Sherman-Morrison只能对一个奏效
    println(dmat)
    #tmp = dmat*(Diagonal(ones(4))-gf)
    #gfupdated = gf - gf*tmp/(1+tr(tmp))
    gfupdated = ShermanMorrison(dmat, gf)
    println(newgf)
    println(gfupdated)
    return
#测试ratio
    dpre = det(Diagonal(ones(4))+prod(reverse(allbmats)))
    dmat = Diagonal(rand(4).-0.5)
    bm2 = (Diagonal(ones(4))+dmat)*allbmats[19]
    allbmats[19] = bm2
    dpst = det(Diagonal(ones(4))+prod(reverse(allbmats)))
    newgf = rawgf(allbmats, 19)
    gfupdated = ShermanMorrison(dmat, gf)
    println(newgf)
    println(gfupdated)
    println(dpst/dpre)
    println(Woodbury(dmat, gf))
    return
=#



"""
初始化ss
"""
function initialize_SS_Gauge1(Nt, Ng, sp, hscfg, ra, ia, rb, ib)
    sslen = Int(Nt//Ng)
    bmats2 = Vector{Matrix{ComplexF64}}(undef, Ng)
    allbmats2 = Vector{Matrix{ComplexF64}}(undef, Nt)
    for bi in Base.OneTo(Ng)
        cfg = hscfg[bi, :]
        ehk, Ui = unpack_splitting(sp, bi)
        bmats2[bi] = bmat_Gauge1ND(ehk, cfg, Ui, ra, ia, rb, ib, sp.dt)
        allbmats2[bi] = bmats2[bi]
    end
    ss2 = ScrollSVD(bmats2)
    for si in Base.OneTo(sslen-1)
        for bi in Base.OneTo(Ng)
            cfg = hscfg[bi+si*Ng, :]
            ehk, Ui = unpack_splitting(sp, bi+si*Ng)
            bmats2[bi] = bmat_Gauge1ND(ehk, cfg, Ui, ra, ia, rb, ib, sp.dt)
            allbmats2[bi+si*Ng] = bmats2[bi]
        end
        push!(ss2, bmats2)
    end
    return sslen, bmats2, allbmats2, ss2
end


"""
DQMC计算平均的符号
"""
function dqmc_step_Gauge1(Nt, Ng, sp, hscfg, ra, ia, rb, ib, sslen, bmats, allbmats, ss)
    Nx = length(ra)
    #
    gf, ph = eq_green_scratch(ss)
    # TODO: phase需要考虑进来
    for it in Base.OneTo(Nt)
        for ix in Base.OneTo(Nx)
            pval = rb[ix] + im*ib[ix]
            if hscfg[it, ix] == -1
                ph = ph * pval / abs(pval)
            else
                ph = ph * (1-pval) / abs(1-pval)
            end
        end
    end
    #allbidxs, isR2L = qmcstatus(Nt)
    #R->L
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Nt - (Ng*(si-1)+bi) + 1
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #println(tau, " ", xi, " ", sp1)
                idx, dmat, dwgt = Δmat_Gauge1ND(length(cfg), xi, -sp1, sp1,
                Ui[xi], ra[xi], ia[xi], rb[xi], ib[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = -sp1
                    hscfg[tau, xi] = -sp1
                end
            end
            #重新计算更新过的
            bmats[Ng-bi+1] = bmat_Gauge1ND(ehk, cfg, Ui, ra, ia, rb, ib, sp.dt)
            allbmats[tau] = bmats[Ng-bi+1]
            gf = down_prog_green(gf, allbmats[tau])
            #
            #allbidxs, isR2L = qmcstatus(Nt, allbidxs, isR2L)
            #println(allbidxs, isR2L)
        end
        scrollR2L(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        for it in Base.OneTo(Nt)
            for ix in Base.OneTo(Nx)
                pval = rb[ix] + im*ib[ix]
                if hscfg[it, ix] == -1
                    newph = newph * pval / abs(pval)
                else
                    newph = newph * (1-pval) / abs(1-pval)
                end
            end
        end
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(gf - oldgf)
        #println(newph, " ", ph, " ", abs(newph - ph))
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
        @← gf newgf
        @← ph newph
    end
    #L->R
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Ng*(si-1)+bi
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            gf = up_prog_green(gf, allbmats[tau])
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                idx, dmat, dwgt = Δmat_Gauge1ND(length(cfg), xi, -sp1, sp1,
                Ui[xi], ra[xi], ia[xi], rb[xi], ib[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = -sp1
                    hscfg[tau, xi] = -sp1
                end
            end
            #重新计算B矩阵
            bmats[bi] = bmat_Gauge1ND(ehk, cfg, Ui, ra, ia, rb, ib, sp.dt)
            allbmats[tau] = bmats[bi]
            #allbidxs, isR2L = qmcstatus(Nt, allbidxs, isR2L)
            #println(allbidxs, isR2L)
        end
        scrollL2R(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        for it in Base.OneTo(Nt)
            for ix in Base.OneTo(Nx)
                pval = rb[ix] + im*ib[ix]
                if hscfg[it, ix] == -1
                    newph = newph * pval / abs(pval)
                else
                    newph = newph * (1-pval) / abs(1-pval)
                end
            end
        end
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(newph, " ", ph, " ", abs(newph - ph))
        #println(ph, newph)
        @← gf newgf
        @← ph newph
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
    end
    return hscfg, bmats, allbmats, ss, gf, ph
end


"""
初始化ss
"""
function initialize_SS_Quad1(Nt, Ng, sp, hscfg, ϕs)
    sslen = Int(Nt//Ng)
    bmats2 = Vector{Matrix{ComplexF64}}(undef, Ng)
    allbmats2 = Vector{Matrix{ComplexF64}}(undef, Nt)
    for bi in Base.OneTo(Ng)
        cfg = hscfg[bi, :]
        ehk, Ui = unpack_splitting(sp, bi)
        bmats2[bi] = bmat_Quad1ND(ehk, cfg, Ui, ϕs, sp.dt)
        allbmats2[bi] = bmats2[bi]
    end
    ss2 = ScrollSVD(bmats2)
    for si in Base.OneTo(sslen-1)
        for bi in Base.OneTo(Ng)
            cfg = hscfg[bi+si*Ng, :]
            ehk, Ui = unpack_splitting(sp, bi+si*Ng)
            bmats2[bi] = bmat_Quad1ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats2[bi+si*Ng] = bmats2[bi]
        end
        push!(ss2, bmats2)
    end
    return sslen, bmats2, allbmats2, ss2
end


"""
包含phase的抽样
``````
在Woodbury计算完det的ratio以后还需要比较行列式前面的比例
TODO: 1) 计算权重 2) 翻转的概率
"""
function dqmc_step_Quad1(Nt, Ng, sp, hscfg, ϕs, sslen, bmats, allbmats, ss)
    Nx = length(ϕs)
    #
    gf, ph = eq_green_scratch(ss)
    #allbidxs, isR2L = qmcstatus(Nt)
    #R->L
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Nt - (Ng*(si-1)+bi) + 1
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #变成0 1 2 3
                proposal_sp = sp1 > 0 ? sp1+1 : sp1+2
                proposal_sp = mod(proposal_sp+ceil(3*rand()), 4)
                proposal_sp = proposal_sp > 1 ? proposal_sp-1 : proposal_sp-2
                #println(tau, " ", xi, " ", sp1)
                idx, dmat, dwgt = Δmat_Quad1ND(Nx, xi, proposal_sp, sp1,
                Ui[xi], ϕs[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = proposal_sp
                    hscfg[tau, xi] = proposal_sp
                end
            end
            #重新计算更新过的
            bmats[Ng-bi+1] = bmat_Quad1ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats[tau] = bmats[Ng-bi+1]
            gf = down_prog_green(gf, allbmats[tau])
        end
        scrollR2L(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(gf - gf)
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
        @← gf newgf
        @← ph newph
    end
    #L->R
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Ng*(si-1)+bi
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            gf = up_prog_green(gf, allbmats[tau])
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #变成0 1 2 3
                proposal_sp = sp1 > 0 ? sp1+1 : sp1+2
                proposal_sp = mod(proposal_sp+ceil(3*rand()), 4)
                proposal_sp = proposal_sp > 1 ? proposal_sp-1 : proposal_sp-2
                idx, dmat, dwgt = Δmat_Quad1ND(Nx, xi, proposal_sp, sp1,
                Ui[xi], ϕs[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = proposal_sp
                    hscfg[tau, xi] = proposal_sp
                end
            end
            #重新计算B矩阵
            bmats[bi] = bmat_Quad1ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats[tau] = bmats[bi]
        end
        scrollL2R(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(newph, " ", ph, " ", abs(newph - ph))
        @← gf newgf
        @← ph newph
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
    end
    return hscfg, bmats, allbmats, ss, gf, ph
end


"""
初始化ss
"""
function initialize_SS_Quad2(Nt, Ng, sp, hscfg, ϕs)
    sslen = Int(Nt//Ng)
    bmats2 = Vector{Matrix{ComplexF64}}(undef, Ng)
    allbmats2 = Vector{Matrix{ComplexF64}}(undef, Nt)
    for bi in Base.OneTo(Ng)
        cfg = hscfg[bi, :]
        ehk, Ui = unpack_splitting(sp, bi)
        bmats2[bi] = bmat_Quad2ND(ehk, cfg, Ui, ϕs, sp.dt)
        allbmats2[bi] = bmats2[bi]
    end
    ss2 = ScrollSVD(bmats2)
    for si in Base.OneTo(sslen-1)
        for bi in Base.OneTo(Ng)
            cfg = hscfg[bi+si*Ng, :]
            ehk, Ui = unpack_splitting(sp, bi+si*Ng)
            bmats2[bi] = bmat_Quad2ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats2[bi+si*Ng] = bmats2[bi]
        end
        push!(ss2, bmats2)
    end
    return sslen, bmats2, allbmats2, ss2
end


"""
包含phase的抽样
``````
在Woodbury计算完det的ratio以后还需要比较行列式前面的比例
TODO: 1) 计算权重 2) 翻转的概率
"""
function dqmc_step_Quad2(Nt, Ng, sp, hscfg, ϕs, sslen, bmats, allbmats, ss)
    ηdict = Dict(
        -2 => -3.301360247771569, 
        -1 => -1.049295246550581,
        1 => 1.049295246550581,
        2 => 3.301360247771569
    )
    Nx = length(ϕs)
    # TODO: 权重带来的额外phase需要考虑进去
    # TODO: 这里的d lnZ真的是0么？
    gf, ph = eq_green_scratch(ss)
    for it in Base.OneTo(Nt)
        _, Ui = unpack_splitting(sp, it)
        for ix in Base.OneTo(Nx)
            sqrtma = sqrt(complex(-sp.dt*Ui[ix]/2, 0.0))
            wgtp = exp(sqrtma*ηdict[hscfg[it, ix]]*(ϕs[ix]-1.0))
            ph = ph * wgtp / abs(wgtp)
        end
    end
    #allbidxs, isR2L = qmcstatus(Nt)
    #R->L
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Nt - (Ng*(si-1)+bi) + 1
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #变成0 1 2 3
                proposal_sp = sp1 > 0 ? sp1+1 : sp1+2
                proposal_sp = mod(proposal_sp+ceil(3*rand()), 4)
                proposal_sp = proposal_sp > 1 ? proposal_sp-1 : proposal_sp-2
                #println(tau, " ", xi, " ", sp1)
                idx, dmat, dwgt = Δmat_Quad2ND(Nx, xi, proposal_sp, sp1,
                Ui[xi], ϕs[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = proposal_sp
                    hscfg[tau, xi] = proposal_sp
                end
            end
            #重新计算更新过的
            bmats[Ng-bi+1] = bmat_Quad2ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats[tau] = bmats[Ng-bi+1]
            gf = down_prog_green(gf, allbmats[tau])
        end
        scrollR2L(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        for it in Base.OneTo(Nt)
            _, Ui = unpack_splitting(sp, it)
            for ix in Base.OneTo(Nx)
                sqrtma = sqrt(complex(-sp.dt*Ui[ix]/2, 0.0))
                wgtp = exp(sqrtma*ηdict[hscfg[it, ix]]*(ϕs[ix]-1.0))
                newph = newph * wgtp / abs(wgtp)
            end
        end
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(si*Ng, " ", abs.(newph - ph))
        #println(gf - gf)
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
        @← gf newgf
        @← ph newph
    end
    #L->R
    for si in Base.OneTo(sslen)
        for bi in Base.OneTo(Ng)
            tau = Ng*(si-1)+bi
            cfg = hscfg[tau, :]
            ehk, Ui = unpack_splitting(sp, tau)
            gf = up_prog_green(gf, allbmats[tau])
            for xi in Base.OneTo(Nx)
                if abs(Ui[xi]) < 1e-6
                    continue
                end
                sp1 = cfg[xi]
                #变成0 1 2 3
                proposal_sp = sp1 > 0 ? sp1+1 : sp1+2
                proposal_sp = mod(proposal_sp+ceil(3*rand()), 4)
                proposal_sp = proposal_sp > 1 ? proposal_sp-1 : proposal_sp-2
                idx, dmat, dwgt = Δmat_Quad2ND(Nx, xi, proposal_sp, sp1,
                Ui[xi], ϕs[xi], sp.dt)
                #
                ratio = Woodbury(idx, dmat, gf)
                ratio = ratio * dwgt
                ##如果需要update
                if rand() < abs(ratio)
                    PressShermanMorrison!(idx, dmat, gf)
                    ph = ph*ratio/abs(ratio)
                    cfg[xi] = proposal_sp
                    hscfg[tau, xi] = proposal_sp
                end
            end
            #重新计算B矩阵
            bmats[bi] = bmat_Quad2ND(ehk, cfg, Ui, ϕs, sp.dt)
            allbmats[tau] = bmats[bi]
        end
        scrollL2R(ss, bmats)
        #oldgf, oldph = gf, ph
        newgf, newph = eq_green_scratch(ss)
        for it in Base.OneTo(Nt)
            _, Ui = unpack_splitting(sp, it)
            for ix in Base.OneTo(Nx)
                sqrtma = sqrt(complex(-sp.dt*Ui[ix]/2, 0.0))
                wgtp = exp(sqrtma*ηdict[hscfg[it, ix]]*(ϕs[ix]-1.0))
                newph = newph * wgtp / abs(wgtp)
            end
        end
        #println(si*Ng, " ", maximum(abs.(gf - newgf)))
        #println(newph, " ", ph, " ", abs(newph - ph))
        @← gf newgf
        @← ph newph
        #@assert all(isapprox.(gf, oldgf, atol=1e-6))
        #@assert isapprox(oldph, ph, atol=1e-6)
    end
    return hscfg, bmats, allbmats, ss, gf, ph
end

